### Replication of Study 2, Part B

fitmodel <- function(f){  # model fit function
  lm(f, data=fake.data)
}

#############################################################################################
### Experimental Conditions

for(S in 1:5){

n <- 1000
k <- 10
q <- 5
r2 <- .5
b.vec <- rep(c(.25, .5, .75, 1, 1.25)[S], q) # Coefficient of explanatory variable
     
s <- 1000 # Number of Simulations
result.array <- array(NA, c(6,8,s))

ptm <- proc.time()

for(i in 1:s){
  
  print(paste("Experiment: 2B", " Condition: ", S, " Simulation: ", i, sep=""))
  

#######################################################################################################################
### Simulate Data Set
#c.vec <-  rep(0, k) # Random correlations
#M <- huerlimann(k, c.vec)
c.vec <-  runif(k, -1, 1) # Random correlations
M <- huerlimann(k, c.vec)
X <- rmvnorm(n, mean=rep(0, k), sigma = M)

### DGP
y <- as.matrix(X[, 1:q]) %*% b.vec 
res <- rnorm(n, 0, 8) # Fix this bit to get SE of around .25 ! 
y <- y + res

fake.data <- as.data.frame(cbind(y, X))
colnames(fake.data) <- c("y", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10")
#colnames(fake.data) <- c("y", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10",
#                        "x11", "x12", "x13", "x14", "x15")
#                         ,"x16", "x17", "x18", "x19", "x20")

#######################################################################################################################

#### Generate Variable Combinations
X <- fake.data[,-1]

combs <- do.call(expand.grid, rep(list(c(FALSE, TRUE)), ncol(X)))[-1,]

### Assign Variable Names
vars <- apply(combs, 1, function(i) names(X)[i]) 

form <- paste("y ~ ", lapply(vars, paste, collapse = " + "), sep = "")                
form <- lapply(form, as.formula)                                                                 

### Fit Models
#ptm <- proc.time()
models <- lapply(form, fitmodel) # fit models
#proc.time() - ptm

### Post-Process Models
names(models) <- seq_along(models) # assign model ID
mods <- mod_summary(models) # summary of models
coefs <- coef_summary(models) # summary of coefficients

coefs_wide_b <- acast(coefs, model ~ variable, value.var=c("Est"))
coefs_wide_se <- acast(coefs, model ~ variable, value.var=c("SE"))
colnames(coefs_wide_se) <- paste("SE", colnames(coefs_wide_b) , sep="")
coefs <- cbind(coefs_wide_b, coefs_wide_se)
coefs <- coefs[,order(colnames(coefs), decreasing=T)]
model <- c(1:dim(mods)[1])
coefs <- cbind(model, coefs)

DATA <- merge(mods, coefs, by="model")
###############################################################################
# EBA vs. Model Averaging

K <- k
Q <- q
true.b <- c(b.vec, rep(0, K-Q))

## EBA (Leamer)
eb.min <- apply(DATA[,7:(6+K)], 2, min, na.rm=T)
eb.max <- apply(DATA[,7:(6+K)], 2, max, na.rm=T)

index.min <- apply(DATA[,7:(6+K)], 2, function(i){which(i==min(i,na.rm=T))})
index.max <- apply(DATA[,7:(6+K)], 2, function(i){which(i==max(i,na.rm=T))})

if(class(index.min)=="list"){
  index.min <- unlist(lapply(index.min, min))
}

if(class(index.max)=="list"){
  index.max <- unlist(lapply(index.max, max))
}

se.min <- DATA[,(7+K):(7+K+K-1)][cbind(index.min, c(1:K))]
se.max <- DATA[,(7+K):(7+K+K-1)][cbind(index.max, c(1:K))]

# Dreher
qu.1 <- apply(DATA[,7:(6+K)], 2, quantile, .1, na.rm=T)
qu.9 <- apply(DATA[,7:(6+K)], 2, quantile, .9, na.rm=T)

# Model Averaging
ma.ew <- apply(DATA[,7:(6+K)], 2, mean, na.rm=T) 
var.b.ew <- apply(DATA[,(7+K):(7+K+K-1)], 2, mean, na.rm=T) 
var.b.ew <- var.b.ew^2
var.ew <- apply(DATA[,7:(6+K)], 2, var, na.rm=T)
sd.ew <- sqrt(var.b.ew + var.ew)

bic <- DATA$BIC/1000 # rescale BIC 
w <- (exp(-bic/2))/(sum(exp(-bic/2))) # weight (cf. Moral-Benito 2010: 19)
ma.w.bic <- apply(DATA[,7:(6+K)], 2, function(i){sum(i * w, na.rm=T)})
var.b.w.bic <- apply(DATA[,(7+K):(7+K+K-1)], 2, function(i){sum((i)^2 * w, na.rm=T)})
var.w.bic <- apply(DATA[,7:(6+K)], 2, function(i){sum( (i-mean(i, na.rm=T))^2 * w, na.rm=T)})
sd.w.bic <- sqrt(var.b.w.bic + var.w.bic)

dev <- DATA$Deviance/1000 # rescale Deviance 
L <- exp(-dev/2) # Likelihood
w <- L/sum(L) # weight 
ma.w.ll <- apply(DATA[,7:(6+K)], 2, function(i){sum(i * w, na.rm=T)})
var.b.w.ll <- apply(DATA[,(7+K):(7+K+K-1)], 2, function(i){sum((i)^2 * w, na.rm=T)}) 
sd.w.ll <- sqrt(var.b.w.ll)

############################################################################################################################
# Robustness Criteria
in.out.eb.s <- 0<eb.min*eb.max & 1.96<abs(eb.min/se.min) & 1.96<abs(eb.max/se.max) # EBA: min and max same sign & significant
in.out.eb <- 0<eb.min*eb.max # EBA: min and max same sign
in.out.ma <- 1.96<abs(ma.ew/sd.ew) # Unweighted Model Averaging
in.out.ma.sim <- .95<apply(cbind(pnorm(0, ma.w.ll, sd.w.ll), 1-pnorm(0, ma.w.ll, sd.w.ll)), 1, max) # Sala-i-Martin: Model Averaging weighted by LL
in.out.ma.bic <- 1.96<abs(ma.w.bic/sd.w.bic) # Model Averaging weighted by BIC 
in.out.d90 <- 0<qu.1*qu.9 # Dreher: 90% quantiles same sign

beta.mat <- rbind(in.out.eb.s, in.out.eb, in.out.ma, in.out.ma.sim, in.out.ma.bic, in.out.d90, ma.ew, ma.w.ll, ma.w.bic, qu.1, eb.min)

colnames(beta.mat) <- c(9, 8, 7, 6, 5, 4, 3, 2, 10, 1)
#colnames(beta.mat) <- c(9, 8, 7, 6, 5, 4, 3, 2, 14, 13, 12, 11, 10, 1)

beta.mat <- beta.mat[,order(as.numeric(colnames(beta.mat)), decreasing=F)]

#############################################################################################################################
# Predictive Criteria

num.robust <- apply(beta.mat[1:6, 1:K], 1, sum) # Number of variables declared "robust"

  true.pos <- apply(beta.mat[1:6, 1:Q], 1, sum) # Number of True Positives
  false.pos <- apply(beta.mat[1:6, (Q+1):K], 1, sum) # Number of FP
  true.neg <- (K-Q)-false.pos
  false.neg <- Q-true.pos

tpr <- true.pos/(true.pos+false.neg) # TPR
tnr <- true.neg/(true.neg+false.pos) # TNR  

correct.sign.ma.ew <- sum(beta.mat["ma.ew",1:Q]>0)/sum(true.b[1:Q]>0)  ### Correct Sign
correct.sign.ma.w.ll <- sum(beta.mat["ma.w.ll",1:Q]>0)/sum(true.b[1:Q]>0)
correct.sign.ma.w.bic <- sum(beta.mat["ma.w.bic",1:Q]>0)/sum(true.b[1:Q]>0)

correct.sign.d90 <-  sum(beta.mat["in.out.d90", 1:Q]==1 & beta.mat["qu.1", 1:Q]>0)/sum(true.b[1:Q]>0)
correct.sign.eb <-  sum(beta.mat["in.out.eb", 1:Q]==1 & beta.mat["eb.min", 1:Q]>0)/sum(true.b[1:Q]>0)
correct.sign.eb.s <-  sum(beta.mat["in.out.eb.s", 1:Q]==1 & beta.mat["eb.min", 1:Q]>0)/sum(true.b[1:Q]>0)

correct.sign <- c(correct.sign.eb.s, correct.sign.eb, correct.sign.ma.ew, correct.sign.ma.w.ll, correct.sign.ma.w.bic, correct.sign.d90)

rmse.ew <- sqrt(mean((true.b - beta.mat["ma.ew",])^2)) ## RMSE 
rmse.w.ll <- sqrt(mean((true.b - beta.mat["ma.w.ll",])^2))
rmse.w.bic <- sqrt(mean((true.b - beta.mat["ma.w.bic",])^2))

rmse <- c(NA, NA, rmse.ew, rmse.w.ll, rmse.w.bic, NA)

corr.ew <- cor(true.b, beta.mat["ma.ew",]) ## Correlation with true b
corr.w.ll <- cor(true.b, beta.mat["ma.w.ll",])
corr.w.bic <- cor(true.b, beta.mat["ma.w.bic",])

corrs <- c(NA, NA, corr.ew, corr.w.ll, corr.w.bic, NA)

result.mat <- cbind(num.robust, true.pos, false.pos, true.neg, false.neg, correct.sign, rmse, corrs) 
result.array[,,i] <- result.mat  


}

time.needed <- proc.time()-ptm

colnames(result.array) <- c("num.robust", "true.pos", "false.pos", "true.neg", "false.neg", "correct.sign", "rmse", "corrs")
rownames(result.array) <- c("eb.s", "eb", "ma", "ma.sim", "ma.bic", "d90")


saveRDS(result.array, paste("five_runif_result_mat_", S, ".rds", sep=""))

}



